TgVOID M_CAT_F32_44( PCU_TgMAT_F32_44 ptmRet, CPCU_TgMAT_F32_44 ptmM0, CPCU_TgMAT_F32_44 ptmM1 )
{
register const __m128 mi00 = _mm_shuffle_ps( ptmM0->m_atvRow[0].m_mData, ptmM0->m_atvRow[0].m_mData, 0x00 );
register const __m128 mi01 = _mm_shuffle_ps( ptmM0->m_atvRow[0].m_mData, ptmM0->m_atvRow[0].m_mData, 0x55 );
register const __m128 mi02 = _mm_shuffle_ps( ptmM0->m_atvRow[0].m_mData, ptmM0->m_atvRow[0].m_mData, 0xAA );
register const __m128 mi03 = _mm_shuffle_ps( ptmM0->m_atvRow[0].m_mData, ptmM0->m_atvRow[0].m_mData, 0xFF );
register const __m128 mi04 = _mm_shuffle_ps( ptmM0->m_atvRow[1].m_mData, ptmM0->m_atvRow[1].m_mData, 0x00 );
register const __m128 mi05 = _mm_shuffle_ps( ptmM0->m_atvRow[1].m_mData, ptmM0->m_atvRow[1].m_mData, 0x55 );
register const __m128 mi06 = _mm_shuffle_ps( ptmM0->m_atvRow[1].m_mData, ptmM0->m_atvRow[1].m_mData, 0xAA );
register const __m128 mi07 = _mm_shuffle_ps( ptmM0->m_atvRow[1].m_mData, ptmM0->m_atvRow[1].m_mData, 0xFF );
register const __m128 mi08 = _mm_shuffle_ps( ptmM0->m_atvRow[2].m_mData, ptmM0->m_atvRow[2].m_mData, 0x00 );
register const __m128 mi09 = _mm_shuffle_ps( ptmM0->m_atvRow[2].m_mData, ptmM0->m_atvRow[2].m_mData, 0x55 );
register const __m128 mi10 = _mm_shuffle_ps( ptmM0->m_atvRow[2].m_mData, ptmM0->m_atvRow[2].m_mData, 0xAA );
register const __m128 mi11 = _mm_shuffle_ps( ptmM0->m_atvRow[2].m_mData, ptmM0->m_atvRow[2].m_mData, 0xFF );
register const __m128 mi12 = _mm_shuffle_ps( ptmM0->m_atvRow[3].m_mData, ptmM0->m_atvRow[3].m_mData, 0x00 );
register const __m128 mi13 = _mm_shuffle_ps( ptmM0->m_atvRow[3].m_mData, ptmM0->m_atvRow[3].m_mData, 0x55 );
register const __m128 mi14 = _mm_shuffle_ps( ptmM0->m_atvRow[3].m_mData, ptmM0->m_atvRow[3].m_mData, 0xAA );
register const __m128 mi15 = _mm_shuffle_ps( ptmM0->m_atvRow[3].m_mData, ptmM0->m_atvRow[3].m_mData, 0xFF );
register const __m128 mi16 = _mm_mul_ps( mi00, ptmM1->m_atvRow[0].m_mData );
register const __m128 mi17 = _mm_mul_ps( mi04, ptmM1->m_atvRow[0].m_mData );
register const __m128 mi18 = _mm_mul_ps( mi08, ptmM1->m_atvRow[0].m_mData );
register const __m128 mi19 = _mm_mul_ps( mi12, ptmM1->m_atvRow[0].m_mData );
register const __m128 mi20 = _mm_mul_ps( mi01, ptmM1->m_atvRow[1].m_mData );
register const __m128 mi21 = _mm_mul_ps( mi05, ptmM1->m_atvRow[1].m_mData );
register const __m128 mi22 = _mm_mul_ps( mi09, ptmM1->m_atvRow[1].m_mData );
register const __m128 mi23 = _mm_mul_ps( mi13, ptmM1->m_atvRow[1].m_mData );
register const __m128 mi24 = _mm_mul_ps( mi02, ptmM1->m_atvRow[2].m_mData );
register const __m128 mi25 = _mm_mul_ps( mi06, ptmM1->m_atvRow[2].m_mData );
register const __m128 mi26 = _mm_mul_ps( mi10, ptmM1->m_atvRow[2].m_mData );
register const __m128 mi27 = _mm_mul_ps( mi14, ptmM1->m_atvRow[2].m_mData );
register const __m128 mi28 = _mm_mul_ps( mi03, ptmM1->m_atvRow[3].m_mData );
register const __m128 mi29 = _mm_mul_ps( mi07, ptmM1->m_atvRow[3].m_mData );
register const __m128 mi30 = _mm_mul_ps( mi11, ptmM1->m_atvRow[3].m_mData );
register const __m128 mi31 = _mm_mul_ps( mi15, ptmM1->m_atvRow[3].m_mData );
ptmRet->m_atvRow[0].m_mData = _mm_add_ps( _mm_add_ps( mi16, mi20 ), _mm_add_ps( mi24, mi28 ) );
ptmRet->m_atvRow[1].m_mData = _mm_add_ps( _mm_add_ps( mi17, mi21 ), _mm_add_ps( mi25, mi29 ) );
ptmRet->m_atvRow[2].m_mData = _mm_add_ps( _mm_add_ps( mi18, mi22 ), _mm_add_ps( mi26, mi30 ) );
ptmRet->m_atvRow[3].m_mData = _mm_add_ps( _mm_add_ps( mi19, mi23 ), _mm_add_ps( mi27, mi31 ) );
};
TgVOID M_INV_DET_F32_44( PCU_TgMAT_F32_44 ptmRet, C_TgVEC_M_F32_04 tvDet, CPCU_TgMAT_F32_44 ptmM1 )
{
const register __m128 miR0 = ptmM1->m_atvRow[0].m_mData;
const register __m128 miR1 = ptmM1->m_atvRow[1].m_mData;
const register __m128 miR2 = ptmM1->m_atvRow[2].m_mData;
const register __m128 miR3 = ptmM1->m_atvRow[3].m_mData;
const register __m128 mi01 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 0, 1, 0, 1 ) );
const register __m128 mi02 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 2, 3, 2, 3 ) );
const register __m128 mi03 = _mm_shuffle_ps( miR2, miR3, _MM_PERM( 0, 1, 0, 1 ) );
const register __m128 mi04 = _mm_shuffle_ps( miR2, miR3, _MM_PERM( 2, 3, 2, 3 ) );
const register __m128 mi05 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 0, 1, 2, 3 ) );
const register __m128 mi06 = _mm_shuffle_ps( miR1, miR0, _MM_PERM( 0, 1, 2, 3 ) );
const register __m128 mi07 = _mm_shuffle_ps( miR2, miR3, _MM_PERM( 0, 1, 2, 3 ) );
const register __m128 mi08 = _mm_shuffle_ps( miR3, miR2, _MM_PERM( 0, 1, 2, 3 ) );
const register __m128 miC0 = _mm_shuffle_ps( mi01, mi03, _MM_PERM( 0, 2, 0, 2 ) );
const register __m128 miC1 = _mm_shuffle_ps( mi01, mi03, _MM_PERM( 1, 3, 1, 3 ) );
const register __m128 mi011 = _mm_shuffle_ps( miC1, miC1, _MM_PERM( 1, 0, 0, 0 ) );
const register __m128 mi013 = _mm_shuffle_ps( miR3, mi04, _MM_PERM( 3, 2, 3, 0 ) );
const register __m128 mi015 = _mm_shuffle_ps( miR3, mi04, _MM_PERM( 2, 3, 2, 1 ) );
const register __m128 mi012 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 2, 3, 2, 3 ) );
const register __m128 mi014 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 3, 2, 3, 2 ) );
const register __m128 mi016 = _mm_mul_ps( mi012, mi013 );
const register __m128 mi017 = _mm_mul_ps( mi014, mi015 );
const register __m128 mi018 = _mm_sub_ps( mi016, mi017 );
const register __m128 mi019 = _mm_mul_ps( mi011, mi018 );
const register __m128 mi020 = _mm_div_ps( mi019, tvDet );
const register __m128 mi021 = _mm_shuffle_ps( miC1, miC1, _MM_PERM( 2, 2, 1, 1 ) );
const register __m128 mi023 = _mm_shuffle_ps( miR3, mi04, _MM_PERM( 2, 3, 2, 1 ) );
const register __m128 mi025 = _mm_shuffle_ps( miR3, mi04, _MM_PERM( 3, 2, 3, 0 ) );
const register __m128 mi022 = _mm_shuffle_ps( mi02, miR0, _MM_PERM( 3, 0, 3, 2 ) );
const register __m128 mi024 = _mm_shuffle_ps( mi02, miR0, _MM_PERM( 2, 1, 2, 3 ) );
const register __m128 mi026 = _mm_mul_ps( mi022, mi023 );
const register __m128 mi027 = _mm_mul_ps( mi024, mi025 );
const register __m128 mi028 = _mm_sub_ps( mi026, mi027 );
const register __m128 mi029 = _mm_mul_ps( mi021, mi028 );
const register __m128 mi030 = _mm_div_ps( mi029, tvDet );
const register __m128 mi031 = _mm_shuffle_ps( miC1, miC1, _MM_PERM( 3, 3, 3, 2 ) );
const register __m128 mi032 = _mm_shuffle_ps( mi02, miR0, _MM_PERM( 2, 1, 2, 3 ) );
const register __m128 mi034 = _mm_shuffle_ps( mi02, miR0, _MM_PERM( 3, 0, 3, 2 ) );
const register __m128 mi033 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 3, 2, 3, 2 ) );
const register __m128 mi035 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 2, 3, 2, 3 ) );
const register __m128 mi036 = _mm_mul_ps( mi032, mi033 );
const register __m128 mi037 = _mm_mul_ps( mi034, mi035 );
const register __m128 mi038 = _mm_sub_ps( mi036, mi037 );
const register __m128 mi039 = _mm_mul_ps( mi031, mi038 );
const register __m128 mi040 = _mm_div_ps( mi039, tvDet );
const register __m128 mi041 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 1, 0, 0, 0 ) );
const register __m128 mi043 = _mm_shuffle_ps( miR3, mi04, _MM_PERM( 2, 3, 2, 1 ) );
const register __m128 mi045 = _mm_shuffle_ps( miR3, mi04, _MM_PERM( 3, 2, 3, 0 ) );
const register __m128 mi042 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 3, 2, 3, 2 ) );
const register __m128 mi044 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 2, 3, 2, 3 ) );
const register __m128 mi046 = _mm_mul_ps( mi042, mi043 );
const register __m128 mi047 = _mm_mul_ps( mi044, mi045 );
const register __m128 mi048 = _mm_sub_ps( mi046, mi047 );
const register __m128 mi049 = _mm_mul_ps( mi041, mi048 );
const register __m128 mi050 = _mm_div_ps( mi049, tvDet );
const register __m128 mi051 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 2, 2, 1, 1 ) );
const register __m128 mi053 = _mm_shuffle_ps( miR3, mi04, _MM_PERM( 3, 2, 3, 0 ) );
const register __m128 mi055 = _mm_shuffle_ps( miR3, mi04, _MM_PERM( 2, 3, 2, 1 ) );
const register __m128 mi052 = _mm_shuffle_ps( mi02, miR0, _MM_PERM( 2, 1, 2, 3 ) );
const register __m128 mi054 = _mm_shuffle_ps( mi02, miR0, _MM_PERM( 3, 0, 3, 2 ) );
const register __m128 mi056 = _mm_mul_ps( mi052, mi053 );
const register __m128 mi057 = _mm_mul_ps( mi054, mi055 );
const register __m128 mi058 = _mm_sub_ps( mi056, mi057 );
const register __m128 mi059 = _mm_mul_ps( mi051, mi058 );
const register __m128 mi060 = _mm_div_ps( mi059, tvDet );
const register __m128 mi061 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 3, 3, 3, 2 ) );
const register __m128 mi063 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 2, 3, 2, 3 ) );
const register __m128 mi065 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 3, 2, 3, 2 ) );
const register __m128 mi062 = _mm_shuffle_ps( mi02, miR0, _MM_PERM( 3, 0, 3, 2 ) );
const register __m128 mi064 = _mm_shuffle_ps( mi02, miR0, _MM_PERM( 2, 1, 2, 3 ) );
const register __m128 mi066 = _mm_mul_ps( mi062, mi063 );
const register __m128 mi067 = _mm_mul_ps( mi064, mi065 );
const register __m128 mi068 = _mm_sub_ps( mi066, mi067 );
const register __m128 mi069 = _mm_mul_ps( mi061, mi068 );
const register __m128 mi070 = _mm_div_ps( mi069, tvDet );
const register __m128 mi071 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 1, 0, 0, 0 ) );
const register __m128 mi073 = _mm_shuffle_ps( miR3, mi07, _MM_PERM( 3, 1, 3, 1 ) );
const register __m128 mi075 = _mm_shuffle_ps( miR3, mi08, _MM_PERM( 1, 3, 1, 3 ) );
const register __m128 mi072 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 1, 3, 1, 3 ) );
const register __m128 mi074 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 3, 1, 3, 1 ) );
const register __m128 mi076 = _mm_mul_ps( mi072, mi073 );
const register __m128 mi077 = _mm_mul_ps( mi074, mi075 );
const register __m128 mi078 = _mm_sub_ps( mi076, mi077 );
const register __m128 mi079 = _mm_mul_ps( mi071, mi078 );
const register __m128 mi080 = _mm_div_ps( mi079, tvDet );
const register __m128 mi081 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 2, 2, 1, 1 ) );
const register __m128 mi083 = _mm_shuffle_ps( miR3, mi08, _MM_PERM( 1, 3, 1, 3 ) );
const register __m128 mi085 = _mm_shuffle_ps( miR3, mi07, _MM_PERM( 3, 1, 3, 1 ) );
const register __m128 mi082 = _mm_shuffle_ps( mi05, miR0, _MM_PERM( 3, 1, 3, 1 ) );
const register __m128 mi084 = _mm_shuffle_ps( mi06, miR0, _MM_PERM( 1, 3, 1, 3 ) );
const register __m128 mi086 = _mm_mul_ps( mi082, mi083 );
const register __m128 mi087 = _mm_mul_ps( mi084, mi085 );
const register __m128 mi088 = _mm_sub_ps( mi086, mi087 );
const register __m128 mi089 = _mm_mul_ps( mi081, mi088 );
const register __m128 mi090 = _mm_div_ps( mi089, tvDet );
const register __m128 mi091 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 3, 3, 3, 2 ) );
const register __m128 mi093 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 3, 1, 3, 1 ) );
const register __m128 mi095 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 1, 3, 1, 3 ) );
const register __m128 mi092 = _mm_shuffle_ps( mi06, miR0, _MM_PERM( 1, 3, 1, 3 ) );
const register __m128 mi094 = _mm_shuffle_ps( mi05, miR0, _MM_PERM( 3, 1, 3, 1 ) );
const register __m128 mi096 = _mm_mul_ps( mi092, mi093 );
const register __m128 mi097 = _mm_mul_ps( mi094, mi095 );
const register __m128 mi098 = _mm_sub_ps( mi096, mi097 );
const register __m128 mi099 = _mm_mul_ps( mi091, mi098 );
const register __m128 mi100 = _mm_div_ps( mi099, tvDet );
const register __m128 mi101 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 1, 0, 0, 0 ) );
const register __m128 mi103 = _mm_shuffle_ps( miR3, mi08, _MM_PERM( 1, 2, 1, 2 ) );
const register __m128 mi105 = _mm_shuffle_ps( miR3, mi07, _MM_PERM( 2, 1, 2, 1 ) );
const register __m128 mi102 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 2, 1, 2, 1 ) );
const register __m128 mi104 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 1, 2, 1, 2 ) );
const register __m128 mi106 = _mm_mul_ps( mi102, mi103 );
const register __m128 mi107 = _mm_mul_ps( mi104, mi105 );
const register __m128 mi108 = _mm_sub_ps( mi106, mi107 );
const register __m128 mi109 = _mm_mul_ps( mi101, mi108 );
const register __m128 mi110 = _mm_div_ps( mi109, tvDet );
const register __m128 mi111 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 2, 2, 1, 1 ) );
const register __m128 mi113 = _mm_shuffle_ps( miR3, mi07, _MM_PERM( 2, 1, 2, 1 ) );
const register __m128 mi115 = _mm_shuffle_ps( miR3, mi08, _MM_PERM( 1, 2, 1, 2 ) );
const register __m128 mi112 = _mm_shuffle_ps( mi06, miR0, _MM_PERM( 1, 2, 1, 2 ) );
const register __m128 mi114 = _mm_shuffle_ps( mi05, miR0, _MM_PERM( 2, 1, 2, 1 ) );
const register __m128 mi116 = _mm_mul_ps( mi112, mi113 );
const register __m128 mi117 = _mm_mul_ps( mi114, mi115 );
const register __m128 mi118 = _mm_sub_ps( mi116, mi117 );
const register __m128 mi119 = _mm_mul_ps( mi111, mi118 );
const register __m128 mi120 = _mm_div_ps( mi119, tvDet );
const register __m128 mi121 = _mm_shuffle_ps( miC0, miC0, _MM_PERM( 3, 3, 3, 2 ) );
const register __m128 mi123 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 1, 2, 1, 2 ) );
const register __m128 mi125 = _mm_shuffle_ps( miR2, miR1, _MM_PERM( 2, 1, 2, 1 ) );
const register __m128 mi122 = _mm_shuffle_ps( mi05, miR0, _MM_PERM( 2, 1, 2, 1 ) );
const register __m128 mi124 = _mm_shuffle_ps( mi06, miR0, _MM_PERM( 1, 2, 1, 2 ) );
const register __m128 mi126 = _mm_mul_ps( mi122, mi123 );
const register __m128 mi127 = _mm_mul_ps( mi124, mi125 );
const register __m128 mi128 = _mm_sub_ps( mi126, mi127 );
const register __m128 mi129 = _mm_mul_ps( mi121, mi128 );
const register __m128 mi130 = _mm_div_ps( mi129, tvDet );
ptmRet->m_atvRow[0].m_mData = _mm_add_ps( mi020, _mm_add_ps( mi030, mi040 ) );
ptmRet->m_atvRow[1].m_mData = _mm_add_ps( mi050, _mm_add_ps( mi060, mi070 ) );
ptmRet->m_atvRow[2].m_mData = _mm_add_ps( mi080, _mm_add_ps( mi090, mi100 ) );
ptmRet->m_atvRow[3].m_mData = _mm_add_ps( mi110, _mm_add_ps( mi120, mi130 ) );
};
TgVEC_M_F32_04 M_DET_F32_44( CPCU_TgMAT_F32_44 ptmM1 )
{
const register __m128 miR0 = ptmM1->m_atvRow[0].m_mData;
const register __m128 miR1 = ptmM1->m_atvRow[1].m_mData;
const register __m128 miR2 = ptmM1->m_atvRow[2].m_mData;
const register __m128 miR3 = ptmM1->m_atvRow[3].m_mData;
const register __m128 mi01 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 2, 3, 1, 3 ) );
const register __m128 mi02 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 2, 3, 0, 3 ) );
const register __m128 mi03 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 1, 3, 0, 3 ) );
const register __m128 mi04 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 1, 2, 0, 3 ) );
const register __m128 mi05 = _mm_shuffle_ps( miR3, miR3, _MM_PERM( 3, 1, 2, 3 ) );
const register __m128 mi06 = _mm_shuffle_ps( miR3, miR3, _MM_PERM( 3, 0, 2, 3 ) );
const register __m128 mi07 = _mm_shuffle_ps( miR3, miR3, _MM_PERM( 3, 0, 1, 3 ) );
const register __m128 mi08 = _mm_shuffle_ps( miR3, miR3, _MM_PERM( 2, 0, 1, 3 ) );
const register __m128 mi17 = _mm_mul_ps( mi01, mi05 );
const register __m128 mi18 = _mm_mul_ps( mi02, mi06 );
const register __m128 mi19 = _mm_mul_ps( mi03, mi07 );
const register __m128 mi20 = _mm_mul_ps( mi04, mi08 );
const register __m128 mi09 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 3, 1, 2, 3 ) );
const register __m128 mi10 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 3, 0, 2, 3 ) );
const register __m128 mi11 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 3, 0, 1, 3 ) );
const register __m128 mi12 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 2, 0, 1, 3 ) );
const register __m128 mi13 = _mm_shuffle_ps( miR3, miR3, _MM_PERM( 2, 3, 1, 3 ) );
const register __m128 mi14 = _mm_shuffle_ps( miR3, miR3, _MM_PERM( 2, 3, 0, 3 ) );
const register __m128 mi15 = _mm_shuffle_ps( miR3, miR3, _MM_PERM( 1, 3, 0, 3 ) );
const register __m128 mi16 = _mm_shuffle_ps( miR3, miR3, _MM_PERM( 1, 2, 0, 3 ) );
const register __m128 mi21 = _mm_mul_ps( mi09, mi13 );
const register __m128 mi22 = _mm_mul_ps( mi10, mi14 );
const register __m128 mi23 = _mm_mul_ps( mi11, mi15 );
const register __m128 mi24 = _mm_mul_ps( mi12, mi16 );
const register __m128 mi25 = _mm_sub_ps( mi17, mi21 );
const register __m128 mi26 = _mm_sub_ps( mi18, mi22 );
const register __m128 mi27 = _mm_sub_ps( mi19, mi23 );
const register __m128 mi28 = _mm_sub_ps( mi20, mi24 );
const register __m128 mi29 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 1, 2, 3, 3 ) );
const register __m128 mi30 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 0, 2, 3, 3 ) );
const register __m128 mi31 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 0, 1, 3, 3 ) );
const register __m128 mi32 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 0, 1, 2, 3 ) );
const register __m128 mi33 = _mm_mul_ps( mi29, mi25 );
const register __m128 mi34 = _mm_mul_ps( mi30, mi26 );
const register __m128 mi35 = _mm_mul_ps( mi31, mi27 );
const register __m128 mi36 = _mm_mul_ps( mi32, mi28 );
const register __m128 mi37 = _mm_shuffle_ps( miR0, miR0, _MM_PERM( 0, 0, 0, 0 ) );
const register __m128 mi38 = _mm_shuffle_ps( miR0, miR0, _MM_PERM( 1, 1, 1, 1 ) );
const register __m128 mi39 = _mm_shuffle_ps( miR0, miR0, _MM_PERM( 2, 2, 2, 2 ) );
const register __m128 mi40 = _mm_shuffle_ps( miR0, miR0, _MM_PERM( 3, 3, 3, 3 ) );
const register __m128 mi41 = _mm_mul_ps( mi37, mi33 );
const register __m128 mi42 = _mm_mul_ps( mi38, mi34 );
const register __m128 mi43 = _mm_mul_ps( mi39, mi35 );
const register __m128 mi44 = _mm_mul_ps( mi40, mi36 );
const register __m128 mi45 = _mm_sub_ps( mi41, mi42 );
const register __m128 mi46 = _mm_sub_ps( mi43, mi44 );
const register __m128 mi47 = _mm_add_ps( mi45, mi46 );
const register __m128 mi48 = _mm_shuffle_ps( mi47, mi47, _MM_PERM( 1, 1, 1, 1 ) );
const register __m128 mi49 = _mm_shuffle_ps( mi47, mi47, _MM_PERM( 2, 2, 2, 2 ) );
const register __m128 mi50 = _mm_add_ss( mi47, mi48 );
const register __m128 mi51 = _mm_add_ss( mi50, mi49 );
return ((_mm_shuffle_ps( mi51, mi51, 0x00 )));
};